R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
## ── Attaching packages ───────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.0 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 1.0.2
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## Warning: package 'dplyr' was built under R version 3.6.2
## ── Conflicts ──────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
SIAP <- read.csv("/Users/erikaluna/R\ Studio/msc_thesis/SIAP.csv")
one_crop <- SIAP %>%
filter(crop == "maiz") %>%
group_by(year, state) %>%
summarise(ag_yield = round(sum(production)/sum(harvested), digits = 2),
ag_prod = sum(production),
ag_planted = sum(planted),
ag_harv = sum(harvested),
ag_losses = sum(losses))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
one_crop %>%
DT::datatable()
## Number of observations
## `summarise()` ungrouping output (override with `.groups` argument)
## Panel data A data frama with all years and all states that grow maize.
period <- tibble(rep(c(1980:2016), times = 32)) #32 states report maize production
colnames(period) <- c("year")
states <- tibble(rep(c("aguascalientes","baja california","baja california sur",
"campeche", "chiapas", "chihuahua","coahuila","colima",
"distrito federal","durango",
"guanajuato", "guerrero", "hidalgo", "jalisco",
"mexico", "michoacan", "morelos", "nayarit","nuevo leon",
"oaxaca", "puebla", "queretaro", "quintana roo",
"san luis potosi", "sinaloa", "sonora", "tabasco",
"tamaulipas", "tlaxcala","veracruz", "yucatan", "zacatecas"), times = 37))
colnames(states) <- c("state")
states <- states %>%
arrange(state)
states_period <- cbind(states, period)
Descriptive Statistics
describe(maize$ag_yield)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 1116 3.48 3.29 2.49 2.92 1.56 0.26 39.87 39.61 4.53 35.9
## se
## X1 0.1
summary(maize)
## state year ag_yield ag_prod
## Length:1184 Min. :1980 Min. : 0.260 Min. : 25
## Class :character 1st Qu.:1989 1st Qu.: 1.670 1st Qu.: 105683
## Mode :character Median :1998 Median : 2.485 Median : 359930
## Mean :1998 Mean : 3.484 Mean : 737542
## 3rd Qu.:2007 3rd Qu.: 4.320 3rd Qu.: 979578
## Max. :2016 Max. :39.870 Max. :8337076
## NA's :68 NA's :68
## ag_planted ag_harv ag_losses i
## Min. : 6.5 Min. : 6.5 Min. : 0 Min. : 1.00
## 1st Qu.: 61926.8 1st Qu.: 46913.2 1st Qu.: 2682 1st Qu.: 8.75
## Median :162533.1 Median :138180.0 Median : 16725 Median :16.50
## Mean :263319.8 Mean :231465.7 Mean : 32226 Mean :16.50
## 3rd Qu.:465139.0 3rd Qu.:405935.0 3rd Qu.: 44643 3rd Qu.:24.25
## Max. :988367.2 Max. :960143.8 Max. :416498 Max. :32.00
## NA's :66 NA's :67 NA's :83
## t
## Min. : 1
## 1st Qu.:10
## Median :19
## Mean :19
## 3rd Qu.:28
## Max. :37
##
ggplot(maize) +
aes(x = ag_yield) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 68 rows containing non-finite values (stat_bin).

ggplot(maize) +
aes(x = ag_yield) +
geom_density()
## Warning: Removed 68 rows containing non-finite values (stat_density).

maize %>%
group_by(state) %>%
summarize(max_yield = max(ag_yield, na.rm=TRUE),
min_yield = min(ag_yield, na.rm=TRUE),
range_yield = max(ag_yield, na.rm=TRUE) - min(ag_yield, na.rm=TRUE),
sd_yield = sd(ag_yield, na.rm=TRUE),
mean_yield = mean(ag_yield, na.rm=TRUE),
median_yield = median(ag_yield, na.rm=TRUE)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| aguascalientes |
39.87 |
2.00 |
37.87 |
7.2798377 |
11.8245714 |
10.59 |
| baja california |
38.54 |
0.98 |
37.56 |
8.5605573 |
7.2329032 |
3.65 |
| baja california sur |
7.41 |
2.64 |
4.77 |
1.3394085 |
5.2271429 |
5.24 |
| campeche |
2.74 |
0.58 |
2.16 |
0.5573117 |
1.5691429 |
1.56 |
| chiapas |
2.56 |
1.45 |
1.11 |
0.2942522 |
1.9654286 |
1.90 |
| chihuahua |
9.97 |
0.57 |
9.40 |
2.4569107 |
5.1268571 |
4.85 |
| coahuila |
18.86 |
1.07 |
17.79 |
3.7133300 |
5.1442857 |
4.61 |
| colima |
10.51 |
1.76 |
8.75 |
2.1389538 |
4.0705714 |
3.12 |
| distrito federal |
11.00 |
1.97 |
9.03 |
1.4986115 |
2.7760000 |
2.48 |
| durango |
12.23 |
1.14 |
11.09 |
2.4056917 |
3.8565714 |
3.48 |
| guanajuato |
5.90 |
1.56 |
4.34 |
1.3113085 |
3.1977143 |
2.76 |
| guerrero |
3.06 |
0.97 |
2.09 |
0.4969693 |
2.2025714 |
2.26 |
| hidalgo |
3.46 |
1.56 |
1.90 |
0.5767255 |
2.2722857 |
2.21 |
| jalisco |
10.60 |
3.16 |
7.44 |
2.0650197 |
6.1022857 |
5.63 |
| mexico |
6.51 |
2.55 |
3.96 |
1.0881238 |
4.3457143 |
4.56 |
| michoacan |
4.15 |
1.46 |
2.69 |
0.7386203 |
2.5660000 |
2.31 |
| morelos |
3.50 |
0.91 |
2.59 |
0.7066578 |
2.3077143 |
2.10 |
| nayarit |
8.78 |
1.91 |
6.87 |
1.6842245 |
3.7422857 |
2.95 |
| nuevo leon |
11.06 |
1.00 |
10.06 |
2.2887925 |
3.3737143 |
2.64 |
| oaxaca |
1.54 |
0.92 |
0.62 |
0.1389323 |
1.2651429 |
1.26 |
| puebla |
2.79 |
1.13 |
1.66 |
0.3887487 |
1.9354286 |
1.99 |
| queretaro |
15.83 |
0.95 |
14.88 |
3.1923000 |
4.7117143 |
3.86 |
| quintana roo |
0.99 |
0.26 |
0.73 |
0.1616054 |
0.6131429 |
0.60 |
| san luis potosi |
1.75 |
0.81 |
0.94 |
0.2563096 |
1.2337143 |
1.18 |
| sinaloa |
10.54 |
1.03 |
9.51 |
3.0795467 |
5.7425714 |
6.14 |
| sonora |
7.05 |
2.77 |
4.28 |
1.1458762 |
4.6897143 |
4.69 |
| tabasco |
2.35 |
1.19 |
1.16 |
0.1957906 |
1.6331429 |
1.62 |
| tamaulipas |
4.94 |
1.56 |
3.38 |
0.9109864 |
2.9554286 |
2.74 |
| tlaxcala |
5.66 |
1.69 |
3.97 |
1.3930423 |
3.4051429 |
2.69 |
| veracruz |
2.44 |
1.47 |
0.97 |
0.2583350 |
1.8357143 |
1.84 |
| yucatan |
1.71 |
0.49 |
1.22 |
0.1963811 |
0.8885714 |
0.89 |
| zacatecas |
8.98 |
0.74 |
8.24 |
1.7751668 |
2.0965714 |
1.34 |
Plots
Production
maize %>%
group_by(state) %>%
summarise(max_prod = max(ag_prod, na.rm=T),
min_prod = min(ag_prod, na.rm=T),
range_prod = max(ag_prod, na.rm=T) - min(ag_prod, na.rm=T),
sd_prod = sd(ag_prod, na.rm=T),
mean_prod = mean(ag_prod, na.rm=T),
median_prod = median(ag_prod, na.rm=T)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| aguascalientes |
1493778.03 |
120108.00 |
1373670.03 |
421743.447 |
752373.75 |
715440.57 |
| baja california |
68442.00 |
25.00 |
68417.00 |
16092.783 |
18277.56 |
13658.00 |
| baja california sur |
97642.00 |
6634.00 |
91008.00 |
27614.561 |
37919.32 |
31540.65 |
| campeche |
464714.94 |
12839.00 |
451875.94 |
135606.273 |
175362.01 |
133041.15 |
| chiapas |
2135550.08 |
983415.00 |
1152135.08 |
282788.096 |
1461322.34 |
1460524.00 |
| chihuahua |
2319376.63 |
238326.00 |
2081050.63 |
550629.484 |
1211653.85 |
1125302.76 |
| coahuila |
937497.77 |
21696.96 |
915800.81 |
193974.135 |
184256.92 |
132786.00 |
| colima |
145185.85 |
46313.30 |
98872.55 |
21054.258 |
83773.46 |
77459.00 |
| distrito federal |
50161.00 |
9515.15 |
40645.85 |
9107.056 |
22218.15 |
23383.00 |
| durango |
2773265.28 |
194100.43 |
2579164.85 |
530611.100 |
689347.17 |
496439.00 |
| guanajuato |
2337499.61 |
383021.00 |
1954478.61 |
472100.685 |
1033940.03 |
897201.00 |
| guerrero |
1428121.17 |
331411.00 |
1096710.17 |
269719.455 |
1017910.16 |
1038965.35 |
| hidalgo |
879327.76 |
233929.00 |
645398.76 |
154093.490 |
519376.35 |
513798.00 |
| jalisco |
8337075.94 |
2420683.00 |
5916392.94 |
1453557.271 |
4451739.90 |
4053390.00 |
| mexico |
3610125.45 |
1037491.00 |
2572634.45 |
574154.477 |
2602864.79 |
2600755.36 |
| michoacan |
1935286.73 |
606327.00 |
1328959.73 |
340786.696 |
1173786.27 |
1130893.00 |
| morelos |
122714.05 |
29214.00 |
93500.05 |
20841.622 |
85761.65 |
90722.50 |
| nayarit |
439237.43 |
103011.00 |
336226.43 |
78919.814 |
219908.24 |
208767.21 |
| nuevo leon |
880313.40 |
30372.90 |
849940.50 |
176381.775 |
181841.54 |
128472.00 |
| oaxaca |
851011.00 |
220535.00 |
630476.00 |
164203.079 |
607910.18 |
645255.35 |
| puebla |
1417706.20 |
522662.00 |
895044.20 |
227714.433 |
1005639.22 |
1074593.00 |
| queretaro |
1035148.30 |
58909.00 |
976239.30 |
321939.078 |
423422.56 |
335106.00 |
| quintana roo |
67469.92 |
4160.32 |
63309.60 |
16919.277 |
31771.94 |
33769.56 |
| san luis potosi |
256366.00 |
68751.00 |
187615.00 |
39217.713 |
167479.30 |
172358.00 |
| sinaloa |
6462324.35 |
64754.00 |
6397570.35 |
1910634.563 |
2297744.80 |
2449096.00 |
| sonora |
841582.00 |
46513.00 |
795069.00 |
179725.857 |
235612.83 |
179494.74 |
| tabasco |
181556.66 |
53710.00 |
127846.66 |
29035.967 |
106481.81 |
104467.40 |
| tamaulipas |
1356220.00 |
153360.81 |
1202859.19 |
285901.402 |
612883.77 |
557779.01 |
| tlaxcala |
733642.80 |
134414.00 |
599228.80 |
168570.419 |
441059.10 |
407526.00 |
| veracruz |
1360234.55 |
569513.00 |
790721.55 |
222325.058 |
967546.76 |
966462.62 |
| yucatan |
160737.44 |
8924.00 |
151813.44 |
40031.634 |
109519.68 |
120614.85 |
| zacatecas |
2689221.34 |
221965.00 |
2467256.34 |
508583.271 |
588443.59 |
389535.00 |
maize %>%
ggplot(aes(state, ag_prod)) +
geom_boxplot() +
ylab("Production (tonnes)") +
xlab("State") +
#scale_y_continuous(labels = comma) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 68 rows containing non-finite values (stat_boxplot).

number_obs <- maize %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_prod)))
## `summarise()` ungrouping output (override with `.groups` argument)
maize_complete <- number_obs %>%
filter(obs > 34)
maize_complete
## # A tibble: 31 x 2
## state obs
## <chr> <int>
## 1 aguascalientes 35
## 2 baja california sur 35
## 3 campeche 35
## 4 chiapas 35
## 5 chihuahua 35
## 6 coahuila 35
## 7 colima 35
## 8 distrito federal 35
## 9 durango 35
## 10 guanajuato 35
## # … with 21 more rows
maize_ts <- maize %>%
ggplot(aes(year, ag_prod)) +
geom_line()+
ylab("Production (tonnes)") +
xlab("Years") +
ggtitle("maize Production 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(maize, state %in% c(maize_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
maize_ts

Yield
maize %>%
group_by(state) %>%
summarise(max_yield = max(ag_yield, na.rm=T),
min_yield = min(ag_yield, na.rm=T),
range_yield = max(ag_yield, na.rm=T) - min(ag_yield, na.rm=T),
sd_yield = sd(ag_yield, na.rm=T),
mean_yield = mean(ag_yield, na.rm=T),
median_yield = median(ag_yield, na.rm=T)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| aguascalientes |
39.87 |
2.00 |
37.87 |
7.2798377 |
11.8245714 |
10.59 |
| baja california |
38.54 |
0.98 |
37.56 |
8.5605573 |
7.2329032 |
3.65 |
| baja california sur |
7.41 |
2.64 |
4.77 |
1.3394085 |
5.2271429 |
5.24 |
| campeche |
2.74 |
0.58 |
2.16 |
0.5573117 |
1.5691429 |
1.56 |
| chiapas |
2.56 |
1.45 |
1.11 |
0.2942522 |
1.9654286 |
1.90 |
| chihuahua |
9.97 |
0.57 |
9.40 |
2.4569107 |
5.1268571 |
4.85 |
| coahuila |
18.86 |
1.07 |
17.79 |
3.7133300 |
5.1442857 |
4.61 |
| colima |
10.51 |
1.76 |
8.75 |
2.1389538 |
4.0705714 |
3.12 |
| distrito federal |
11.00 |
1.97 |
9.03 |
1.4986115 |
2.7760000 |
2.48 |
| durango |
12.23 |
1.14 |
11.09 |
2.4056917 |
3.8565714 |
3.48 |
| guanajuato |
5.90 |
1.56 |
4.34 |
1.3113085 |
3.1977143 |
2.76 |
| guerrero |
3.06 |
0.97 |
2.09 |
0.4969693 |
2.2025714 |
2.26 |
| hidalgo |
3.46 |
1.56 |
1.90 |
0.5767255 |
2.2722857 |
2.21 |
| jalisco |
10.60 |
3.16 |
7.44 |
2.0650197 |
6.1022857 |
5.63 |
| mexico |
6.51 |
2.55 |
3.96 |
1.0881238 |
4.3457143 |
4.56 |
| michoacan |
4.15 |
1.46 |
2.69 |
0.7386203 |
2.5660000 |
2.31 |
| morelos |
3.50 |
0.91 |
2.59 |
0.7066578 |
2.3077143 |
2.10 |
| nayarit |
8.78 |
1.91 |
6.87 |
1.6842245 |
3.7422857 |
2.95 |
| nuevo leon |
11.06 |
1.00 |
10.06 |
2.2887925 |
3.3737143 |
2.64 |
| oaxaca |
1.54 |
0.92 |
0.62 |
0.1389323 |
1.2651429 |
1.26 |
| puebla |
2.79 |
1.13 |
1.66 |
0.3887487 |
1.9354286 |
1.99 |
| queretaro |
15.83 |
0.95 |
14.88 |
3.1923000 |
4.7117143 |
3.86 |
| quintana roo |
0.99 |
0.26 |
0.73 |
0.1616054 |
0.6131429 |
0.60 |
| san luis potosi |
1.75 |
0.81 |
0.94 |
0.2563096 |
1.2337143 |
1.18 |
| sinaloa |
10.54 |
1.03 |
9.51 |
3.0795467 |
5.7425714 |
6.14 |
| sonora |
7.05 |
2.77 |
4.28 |
1.1458762 |
4.6897143 |
4.69 |
| tabasco |
2.35 |
1.19 |
1.16 |
0.1957906 |
1.6331429 |
1.62 |
| tamaulipas |
4.94 |
1.56 |
3.38 |
0.9109864 |
2.9554286 |
2.74 |
| tlaxcala |
5.66 |
1.69 |
3.97 |
1.3930423 |
3.4051429 |
2.69 |
| veracruz |
2.44 |
1.47 |
0.97 |
0.2583350 |
1.8357143 |
1.84 |
| yucatan |
1.71 |
0.49 |
1.22 |
0.1963811 |
0.8885714 |
0.89 |
| zacatecas |
8.98 |
0.74 |
8.24 |
1.7751668 |
2.0965714 |
1.34 |
maize %>%
ggplot(aes(state, ag_yield)) +
geom_boxplot() +
ylab("Yield (tonnes/ha)") +
xlab("State") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 68 rows containing non-finite values (stat_boxplot).

number_obs <- maize %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_yield)))
## `summarise()` ungrouping output (override with `.groups` argument)
maize_complete <- number_obs %>%
filter(obs > 34)
maize_complete
## # A tibble: 31 x 2
## state obs
## <chr> <int>
## 1 aguascalientes 35
## 2 baja california sur 35
## 3 campeche 35
## 4 chiapas 35
## 5 chihuahua 35
## 6 coahuila 35
## 7 colima 35
## 8 distrito federal 35
## 9 durango 35
## 10 guanajuato 35
## # … with 21 more rows
maize_ts <- maize %>%
ggplot(aes(year, ag_yield)) +
geom_line()+
ylab("Yield (tonnes/ha)") +
xlab("Years") +
ggtitle("maize Yields 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(maize, state %in% c(maize_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
maize_ts

Area
maize %>%
group_by(state) %>%
summarise(max_area = max(ag_harv, na.rm=T),
min_area = min(ag_harv, na.rm=T),
range_area = max(ag_harv, na.rm=T) - min(ag_harv, na.rm=T),
sd_area = sd(ag_harv, na.rm=T),
mean_area = mean(ag_harv, na.rm=T),
median_area = median(ag_harv, na.rm=T)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| aguascalientes |
107831.0 |
11862.00 |
95969.00 |
28937.729 |
68341.029 |
66869.0 |
| baja california |
18469.0 |
6.50 |
18462.50 |
4305.230 |
3838.984 |
2510.0 |
| baja california sur |
21908.0 |
1550.00 |
20358.00 |
6358.061 |
7633.316 |
4981.0 |
| campeche |
186293.0 |
22196.00 |
164097.00 |
50751.509 |
97657.640 |
104431.0 |
| chiapas |
960143.8 |
504332.00 |
455811.84 |
120143.243 |
746915.560 |
702700.0 |
| chihuahua |
467063.0 |
78410.00 |
388653.00 |
89282.790 |
257589.847 |
259377.0 |
| coahuila |
54916.0 |
14750.55 |
40165.45 |
11165.860 |
33169.454 |
32642.0 |
| colima |
43703.0 |
11920.00 |
31783.00 |
10029.121 |
24591.167 |
24109.0 |
| distrito federal |
14384.0 |
2160.00 |
12224.00 |
3184.903 |
8485.669 |
8136.0 |
| durango |
235521.0 |
94009.76 |
141511.24 |
36710.573 |
177580.652 |
181708.0 |
| guanajuato |
455055.6 |
147933.00 |
307122.58 |
77625.786 |
327466.658 |
336557.0 |
| guerrero |
513566.0 |
341752.00 |
171814.00 |
41750.341 |
457701.783 |
466473.0 |
| hidalgo |
280490.0 |
105952.00 |
174538.00 |
32671.741 |
228483.923 |
233995.2 |
| jalisco |
916264.0 |
590515.49 |
325748.51 |
68458.874 |
738114.661 |
748010.7 |
| mexico |
746816.0 |
360047.60 |
386768.40 |
80984.400 |
607378.266 |
605970.5 |
| michoacan |
561059.0 |
355787.48 |
205271.52 |
41065.500 |
458596.557 |
464983.0 |
| morelos |
57820.0 |
19452.00 |
38368.00 |
10091.074 |
38934.708 |
39360.0 |
| nayarit |
107741.0 |
43213.25 |
64527.75 |
14931.730 |
61620.539 |
57382.0 |
| nuevo leon |
94674.3 |
12611.80 |
82062.50 |
20017.335 |
52610.726 |
54303.0 |
| oaxaca |
598942.5 |
239930.00 |
359012.52 |
92999.724 |
473285.145 |
485977.3 |
| puebla |
629374.0 |
309824.31 |
319549.69 |
75510.784 |
521914.969 |
541182.0 |
| queretaro |
123355.9 |
43143.00 |
80212.90 |
23555.979 |
86984.931 |
88259.0 |
| quintana roo |
85575.0 |
9660.00 |
75915.00 |
20425.422 |
49413.715 |
49868.0 |
| san luis potosi |
218808.1 |
46456.00 |
172352.15 |
47219.335 |
143351.988 |
146118.0 |
| sinaloa |
613197.3 |
46119.00 |
567078.29 |
180695.804 |
311682.077 |
363936.0 |
| sonora |
172528.0 |
15090.00 |
157438.00 |
38179.905 |
51395.407 |
35458.0 |
| tabasco |
105023.0 |
30880.00 |
74143.00 |
18947.972 |
66039.302 |
67377.0 |
| tamaulipas |
504704.0 |
98155.25 |
406548.75 |
100911.694 |
213186.951 |
198205.0 |
| tlaxcala |
161104.0 |
73478.00 |
87626.00 |
18408.116 |
131840.288 |
131124.0 |
| veracruz |
647535.4 |
338706.00 |
308829.40 |
74599.616 |
522976.031 |
536845.0 |
| yucatan |
166785.0 |
13128.00 |
153657.00 |
40496.663 |
123131.349 |
139404.0 |
| zacatecas |
442043.0 |
101513.60 |
340529.40 |
79128.893 |
295477.558 |
297417.5 |
maize %>%
ggplot(aes(state, ag_harv)) +
geom_boxplot() +
ylab("Area (tonnes)") +
xlab("State") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 67 rows containing non-finite values (stat_boxplot).

number_obs <- maize %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_harv)))
## `summarise()` ungrouping output (override with `.groups` argument)
maize_complete <- number_obs %>%
filter(obs > 34)
maize_complete
## # A tibble: 31 x 2
## state obs
## <chr> <int>
## 1 aguascalientes 35
## 2 baja california sur 35
## 3 campeche 35
## 4 chiapas 35
## 5 chihuahua 35
## 6 coahuila 35
## 7 colima 35
## 8 distrito federal 35
## 9 durango 35
## 10 guanajuato 35
## # … with 21 more rows
maize_ts <- maize %>%
ggplot(aes(year, ag_harv)) +
geom_line()+
ylab("Area harvested (ha)") +
xlab("Years") +
ggtitle("maize - Area Harvested (ha) 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(maize, state %in% c(maize_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
maize_ts

Losses
maize %>%
group_by(state) %>%
summarise(max_losses = max(ag_losses, na.rm=T),
min_losses = min(ag_losses, na.rm=T),
range_losses = max(ag_losses, na.rm=T) - min(ag_losses, na.rm=T),
sd_losses = sd(ag_losses, na.rm=T),
mean_losses = mean(ag_losses, na.rm=T),
median_losses = median(ag_losses, na.rm=T)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| aguascalientes |
98179.0 |
838.0 |
97341.0 |
26988.5843 |
34821.7794 |
32473.500 |
| baja california |
3937.0 |
0.0 |
3937.0 |
874.2048 |
652.6129 |
327.000 |
| baja california sur |
4479.0 |
0.0 |
4479.0 |
758.1018 |
511.7162 |
295.750 |
| campeche |
102704.0 |
603.0 |
102101.0 |
24488.9392 |
20843.0917 |
11390.000 |
| chiapas |
120142.0 |
0.0 |
120142.0 |
30016.9511 |
28319.8903 |
19373.000 |
| chihuahua |
120423.0 |
0.0 |
120423.0 |
37834.4437 |
43595.8477 |
31065.000 |
| coahuila |
26530.0 |
4.5 |
26525.5 |
7538.9236 |
12510.2131 |
12368.000 |
| colima |
11297.0 |
0.0 |
11297.0 |
2560.0987 |
1852.5429 |
849.500 |
| distrito federal |
3083.0 |
0.0 |
3083.0 |
633.4964 |
334.8771 |
21.500 |
| durango |
116948.0 |
734.0 |
116214.0 |
30675.0632 |
29733.5677 |
16725.000 |
| guanajuato |
315239.0 |
5352.0 |
309887.0 |
70134.1731 |
94967.8444 |
68616.000 |
| guerrero |
118839.0 |
1971.0 |
116868.0 |
27630.4202 |
20315.9821 |
10258.250 |
| hidalgo |
147952.0 |
4178.9 |
143773.1 |
34477.2629 |
43952.7397 |
33819.850 |
| jalisco |
201237.0 |
0.0 |
201237.0 |
45142.1965 |
51988.8726 |
35475.500 |
| mexico |
284474.0 |
0.0 |
284474.0 |
56461.4675 |
31347.6224 |
13341.000 |
| michoacan |
134149.0 |
6713.0 |
127436.0 |
29750.4589 |
35081.6806 |
26622.500 |
| morelos |
36077.0 |
0.0 |
36077.0 |
6468.7874 |
2406.3403 |
133.000 |
| nayarit |
11254.0 |
0.0 |
11254.0 |
2878.9156 |
2337.1486 |
1261.000 |
| nuevo leon |
81924.0 |
404.0 |
81520.0 |
23709.7851 |
29212.2491 |
30140.000 |
| oaxaca |
187341.0 |
1010.0 |
186331.0 |
51409.7368 |
65867.4032 |
43324.500 |
| puebla |
292870.2 |
4240.0 |
288630.2 |
74611.8889 |
79630.7631 |
48354.000 |
| queretaro |
74808.4 |
0.0 |
74808.4 |
20777.9119 |
25499.7086 |
18273.000 |
| quintana roo |
64672.0 |
350.0 |
64322.0 |
19000.7413 |
21795.2380 |
17931.000 |
| san luis potosi |
152662.2 |
37590.0 |
115072.2 |
35708.2068 |
90717.3109 |
95791.500 |
| sinaloa |
416497.8 |
5046.0 |
411451.8 |
71139.0756 |
47302.9776 |
27423.500 |
| sonora |
24422.0 |
0.0 |
24422.0 |
5689.9027 |
4264.8015 |
2649.000 |
| tabasco |
34128.0 |
242.0 |
33886.0 |
8200.7811 |
8798.5000 |
5850.625 |
| tamaulipas |
88012.0 |
8466.0 |
79546.0 |
21456.4533 |
38187.3582 |
34609.150 |
| tlaxcala |
71459.0 |
0.0 |
71459.0 |
15066.7309 |
8802.8491 |
1498.000 |
| veracruz |
117580.0 |
5149.0 |
112431.0 |
29292.9785 |
54707.6020 |
49240.000 |
| yucatan |
171644.0 |
0.0 |
171644.0 |
45117.3412 |
32991.3453 |
13879.500 |
| zacatecas |
180285.0 |
1773.0 |
178512.0 |
53379.0982 |
68336.0388 |
69056.000 |
maize %>%
ggplot(aes(state, ag_losses)) +
geom_boxplot() +
ylab("Losses (ha)") +
xlab("State") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 83 rows containing non-finite values (stat_boxplot).

number_obs <- maize %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_losses)))
## `summarise()` ungrouping output (override with `.groups` argument)
maize_complete <- number_obs %>%
filter(obs > 34)
maize_complete
## # A tibble: 16 x 2
## state obs
## <chr> <int>
## 1 campeche 35
## 2 chiapas 35
## 3 chihuahua 35
## 4 coahuila 35
## 5 colima 35
## 6 distrito federal 35
## 7 durango 35
## 8 hidalgo 35
## 9 morelos 35
## 10 nayarit 35
## 11 nuevo leon 35
## 12 puebla 35
## 13 queretaro 35
## 14 quintana roo 35
## 15 tlaxcala 35
## 16 veracruz 35
maize_ts <- maize %>%
ggplot(aes(year, ag_losses)) +
geom_line()+
ylab("Losses (ha)") +
xlab("Years") +
ggtitle("maize - Losses (planted-harvested) (ha) 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(maize, state %in% c(maize_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
maize_ts

Regression line
maize_lm <- maize %>%
ggplot(aes(year, ag_yield)) +
geom_point() +
geom_smooth(method = "lm", se = T) +
ylab("Yield (tonnes/ha)") +
xlab("Years") +
ggtitle("Maize Yields") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(maize, state %in% c(maize_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
maize_lm
## Warning: Removed 68 rows containing non-finite values (stat_smooth).
## Warning: Removed 68 rows containing missing values (geom_point).
